#### "How do past repression and indoctrination affect redistributive preferences?" ####
# authors: "Pelke, Lars"
# date: 2019-10-23
# written under "R version 3.6.0 (2019-03-11)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

# set working directory

# loading packages

library(countrycode)
library(tidyverse)
library(viridis)
library(scales)
library(readstata13)

#### Import Data ####

#Load World value survey data

evs <- read.dta13("data/EVS/ZA4804_v3-1-0.dta")
evs2017 <- read.dta13("data/EVS/ZA7500_v3-0-0.dta")


#### Reduce Datasets ####

evs<- evs %>%
  select(X001, X002, X003, X011, X025, X028, X047, X045, E035, E037, S020, S003, S025, S002)
  
summary(evs$S002)

evs2017 <- evs2017 %>%
  rename(X001 = v225,
         X002 = v226, 
         X003 = age, 
         X011 = v239_r, 
         X025 = v243_edulvlb_1, 
         X028 = v253, 
         X047 = v261, 
         E035 = v106, 
         E037 = v103, 
         S020 = year , 
         S003 = country, 
         S025 = cntry_y)

evs2017<- evs2017 %>%
  select(X001, X002, X003, X011, X025, X028, X047, E035, E037, S020, S003, S025) %>%
  mutate(E045 = NA, 
         S002 = "Wave 2018-2018")
summary(evs2017)
head(evs2017)

#### Prepare EVS (both) Variables for Empirical Analysis ####

#Sex
evs <- evs %>%
  mutate(sex = ifelse(X001=="Male", 0,
                      ifelse(X001=="Female", 1, NA)))
table(evs$sex)
sum(is.na(evs$sex))

evs2017 <- evs2017 %>%
  mutate(sex = ifelse(X001=="male", 0,
                      ifelse(X001=="female", 1, NA)))
table(evs2017$sex)
sum(is.na(evs2017$sex))


#Age
evs <- evs %>%
  mutate(age= X003) %>%
  mutate(age = ifelse(X003<0, NA, age))
summary(evs$age)
sum(is.na(evs$age))

evs2017 <- evs2017 %>%
  mutate(age= X003) %>%
  mutate(age = ifelse(X003<0, NA, age))
summary(evs2017$age)
sum(is.na(evs2017$age))

#Education 
summary(evs$X025)
evs <- evs %>%
  mutate(education= X025) %>%
  mutate(education = ifelse(X025=="Missing; Unknown", NA,
                            ifelse(X025=="Not asked in survey", NA,
                                   ifelse(X025=="No answer", NA,
                                          ifelse(X025=="Don�t know", NA, education)))))
table(evs$education)
sum(is.na(evs$education))

evs <- evs %>%
  mutate(education_3 = ifelse(education<=7, 1, 
                              ifelse(education>7 & education <= 11, 2, 3)))
summary(evs$education_3)


summary(evs2017$X025)
evs2017 <- evs2017 %>%
  mutate(education= X025) %>%
  mutate(education = ifelse(X025=="other", NA,
                                   ifelse(X025=="no answer", NA,
                                          ifelse(X025=="dont know", NA, education))))
table(evs2017$education)
sum(is.na(evs2017$education))

evs2017 <- evs2017 %>%
  mutate(education_3 = ifelse(education<=9, 1, 
                              ifelse(education>9 & education <= 11, 2, 3)))
summary(evs2017$education_3)

# Birth Year
evs <- evs %>%
  mutate(birth_year = ifelse(X002<0, NA, X002))
summary(evs$birth_year)
sum(is.na(evs$birth_year))

evs2017 <- evs2017 %>%
  mutate(birth_year = ifelse(X002<0, NA, X002))
summary(evs2017$birth_year)
sum(is.na(evs2017$birth_year))

# Income Deciles 

evs <- evs %>%
  mutate(income_deciles= X047) %>%
  mutate(income_deciles = ifelse(X047=="Missing; Unknown", NA, 
                                 ifelse(X047=="Not asked in survey", NA,
                                        ifelse(X047=="Not applicable", NA,
                                               ifelse(X047=="No answer", NA,
                                                      ifelse(X047=="Don�t know", NA, income_deciles))))))
table(evs$income_deciles)
evs$income_deciles <- rescale(evs$income_deciles, to = c(1, 11))
evs <- evs %>%
  mutate(income_deciles = ifelse(income_deciles==11, 10, income_deciles)) 
sum(is.na(evs$income_deciles))

evs2017 <- evs2017 %>%
  mutate(income_deciles= X047) %>%
  mutate(income_deciles = ifelse(X047=="multiple answers Mail", NA, 
                                 ifelse(X047=="no answer", NA,
                                        ifelse(X047=="dont know",NA, income_deciles))))
table(evs2017$income_deciles)
evs2017$income_deciles <- rescale(evs2017$income_deciles, to = c(1, 10))
sum(is.na(evs2017$income_deciles))


evs <- evs %>%
  mutate(income_quintiles = ifelse(income_deciles==1, 1,
                                   ifelse(income_deciles==2, 1,
                                          ifelse(income_deciles==3, 2,
                                                 ifelse(income_deciles==4, 2,
                                                        ifelse(income_deciles==5, 3,
                                                               ifelse(income_deciles==6, 3,
                                                                      ifelse(income_deciles==7, 4,
                                                                             ifelse(income_deciles==8, 4,
                                                                                    ifelse(income_deciles==9, 5,
                                                                                           ifelse(income_deciles==10, 5, X047))))))))))) %>%
  mutate(income_quintiles = ifelse(is.na(income_deciles), NA, income_quintiles))
summary(evs$income_quintiles)
sum(is.na(evs$income_quintiles))

evs2017 <- evs2017 %>%
  mutate(income_quintiles = ifelse(income_deciles==1, 1,
                                   ifelse(income_deciles==2, 1,
                                          ifelse(income_deciles==3, 2,
                                                 ifelse(income_deciles==4, 2,
                                                        ifelse(income_deciles==5, 3,
                                                               ifelse(income_deciles==6, 3,
                                                                      ifelse(income_deciles==7, 4,
                                                                             ifelse(income_deciles==8, 4,
                                                                                    ifelse(income_deciles==9, 5,
                                                                                           ifelse(income_deciles==10, 5, X047))))))))))) %>%
  mutate(income_quintiles = ifelse(is.na(income_deciles), NA, income_quintiles))
summary(evs2017$income_quintiles)
sum(is.na(evs2017$income_quintiles))


# Social Class

evs <- evs %>%
  mutate(social_class= X045) %>%
  mutate(social_class = ifelse(X045=="Missing; Unknown", NA, 
                                 ifelse(X045=="Not asked in survey", NA,
                                        ifelse(X045=="Not applicable", NA,
                                               ifelse(X045=="No answer", NA,
                                                      ifelse(X045=="Don�t know", NA, social_class))))))

table(evs$social_class)
evs$social_class <- rescale(evs$social_class, to = c(1, 5))
sum(is.na(evs$social_class))

# Children
evs <- evs %>%
  mutate(children= X011) %>%
  mutate(children = ifelse(X011=="Missing; Unknown", NA, 
                               ifelse(X011=="Not asked in survey", NA,
                                      ifelse(X011=="Not applicable", NA,
                                             ifelse(X011=="No answer", NA,
                                                    ifelse(X011=="Don�t know", NA,
                                                           ifelse(X011 == "No child", 0, 1)))))))

table(evs$children)

evs2017 <- evs2017 %>%
  mutate(children= X011) %>%
  mutate(children = ifelse(X011==-2, NA, 
                           ifelse(X011==-1, NA, 
                                  ifelse(X011 == 0, 0, 1))))

table(evs2017$children)

# Unemployed
evs <- evs %>%
  mutate(unemployed=  ifelse(X028=="Missing; Unknown", NA, 
                             ifelse(X028=="Not asked in survey", NA,
                                    ifelse(X028=="Not applicable", NA,
                                           ifelse(X028=="No answer", NA,
                                                  ifelse(X028=="Don�t know", NA,
                                                         ifelse(X028 == "Unemployed", 1, 0)))))))
summary(evs$unemployed)

evs2017 <- evs2017 %>%
  mutate(unemployed=  ifelse(X028=="multiple answers Mail", NA, 
                             ifelse(X028=="not applicable", NA,
                                    ifelse(X028=="no answer", NA,
                                           ifelse(X028=="dont know ", NA,
                                                  ifelse(X028=="no answer", NA,
                                                         ifelse(X028 == "unemployed", 1, 0)))))))
summary(evs2017$unemployed)

#### Mutating Dependent Variables ####

evs <- evs %>%
  mutate(income_equality= E035) %>%
  mutate(income_equality= ifelse(E035=="Missing; Unknown", NA, 
                                 ifelse(E035=="Not asked in survey", NA,
                                        ifelse(E035=="No answer", NA,
                                               ifelse(E035=="Not applicable", NA,
                                                      ifelse(E035=="Don�t know", NA, income_equality))))))
table(evs$income_equality)
evs$income_equality <- rescale(evs$income_equality, to = c(1, 10))
table(evs$income_equality)
sum(is.na(evs$income_equality))

evs2017 <- evs2017 %>%
  mutate(income_equality= E035) %>%
  mutate(income_equality= ifelse(E035<0, NA,income_equality))
table(evs2017$income_equality)
sum(is.na(evs2017$income_equality))


evs <- evs %>%
  mutate(government_resp= E037) %>%
  mutate(government_resp= ifelse(E037=="Missing; Unknown", NA, 
                                 ifelse(E037=="Not asked in survey", NA,
                                        ifelse(E037=="No answer", NA,
                                               ifelse(E037=="Not applicable", NA,
                                                      ifelse(E037=="Don�t know", NA, government_resp))))))
table(evs$government_resp)
evs$government_resp <- rescale(evs$government_resp, to = c(1, 10))
table(evs$government_resp)
sum(is.na(evs$government_resp))

evs2017 <- evs2017 %>%
  mutate(government_resp= E037) %>%
  mutate(government_resp= ifelse(E037<0, NA,government_resp))
table(evs2017$government_resp)
sum(is.na(evs2017$government_resp))


## Transforming Variables from less support to more support (1 = less support -> 10 most support)
evs <- evs %>%
  mutate(income_equality = if_else(income_equality==1, 10, 
                                   if_else(income_equality==2, 9, 
                                           if_else(income_equality==3, 8, 
                                                   if_else(income_equality==4, 7, 
                                                           if_else(income_equality==5, 6, 
                                                                   if_else(income_equality==6, 5,
                                                                           if_else(income_equality==7, 4,
                                                                                   if_else(income_equality==8, 3,
                                                                                           if_else(income_equality==9, 2,
                                                                                                   if_else(income_equality==10, 1,income_equality)))))))))))


table(evs$government_resp)
table(evs$income_equality)

evs2017$income_equality <- as.numeric(evs2017$income_equality)
evs2017 <- evs2017 %>%
  mutate(income_equality = if_else(income_equality==1, 10, 
                                   if_else(income_equality==2, 9, 
                                           if_else(income_equality==3, 8, 
                                                   if_else(income_equality==4, 7, 
                                                           if_else(income_equality==5, 6, 
                                                                   if_else(income_equality==6, 5,
                                                                           if_else(income_equality==7, 4,
                                                                                   if_else(income_equality==8, 3,
                                                                                           if_else(income_equality==9, 2,
                                                                                                   if_else(income_equality==10, 1,income_equality)))))))))))


table(evs2017$government_resp)
table(evs2017$income_equality)


#### Country Codes to EVS Country Items ####

evs$iso3n <- countrycode(evs$S003, "country.name", "iso3n", warn = TRUE)
evs$iso3n[evs$S003 == "Northern Ireland"] <- 826
evs$iso3n[evs$S003 == "Kosovo"] <- 1100

evs2017$iso3n <- countrycode(evs2017$S003, "country.name", "iso3n", warn = TRUE)

table(evs$S020)
evs <- evs %>%
  mutate(year= S020) %>%
  mutate(year= case_when(S020==1981~ 1981, 
                         S020==1982~ 1982, 
                         S020==1983~ 1983, 
                         S020==1984~ 1984, 
                         S020==1990~ 1990, 
                         S020==1991~ 1991, 
                         S020==1992~ 1992, 
                         S020==1993~ 1993, 
                         S020==1999~ 1999, 
                         S020==2000~ 2000,
                         S020==2001~ 2001, 
                         S020==2000~ 2000, 
                         S020==2008~ 2008,
                         S020==2009~ 2009))

evs <- evs %>%
  mutate(wave = NA) %>%
  mutate(wave = case_when(year>=1981 & year <=1984 ~ "EVS Wave 1", 
                          year>=1990 & year <=1993 ~ "EVS Wave 2", 
                          year>=1999 & year <=2001 ~ "EVS Wave 3", 
                          year>=2008 & year <=2009 ~ "EVS Wave 4"))

table(evs$wave)

summary(evs2017$S020)
evs2017 <- evs2017 %>%
  mutate(year= S020) %>%
  mutate(year= case_when(S020==2017~ 2017, 
                         S020==2018~ 2018, 
                         S020==2019~ 2019))
summary(evs2017$year)

evs2017 <- evs2017 %>%
  mutate(wave = NA) %>%
  mutate(wave = case_when(year>=2017 & year <=2019 ~ "EVS Wave 5"))
table(evs2017$wave)

#### Combine EVS Wave 1-4 and Wave 5 ####

evs <- evs %>%
  dplyr::select(iso3n, year, wave, sex, age, education, education_3, birth_year, income_deciles, 
                income_quintiles, social_class, children, unemployed, income_equality, government_resp)

evs2017 <- evs2017 %>%
  dplyr::select(iso3n, year, wave, sex, age, education, education_3, birth_year, income_deciles, 
                income_quintiles, children, unemployed, income_equality, government_resp)

evsdata <- evs %>%
  bind_rows(evs2017)

#### Age and Cohorts WVS ####

## cohort of respondents ##

evsdata <- evsdata %>%
  mutate(cohort = birth_year)
summary(evsdata$birth_year)


evsdata <- evsdata %>%
  mutate(cohort_5 = cut(evsdata$cohort, seq(1885, 2020, by = 5), right = F, labels = c(1885, 1890, 1895, 1900, 1905,
                                                                                       1910, 1915, 1920, 1925, 1930, 
                                                                                       1935, 1940, 1945, 1950, 1955,
                                                                                       1960, 1965, 1970, 1975, 1980,
                                                                                       1985, 1990, 1995, 2000, 2005, 
                                                                                       2010, 2015)
  )) 

evsdata$cohort_5 <- as.numeric(as.character(evsdata$cohort_5))

evsdata <- evsdata %>%
  mutate(cohortmatch5_15 = cohort_5 + 15) # 5-year cohorts year plus 15 years" socialization years

evsdata <- evsdata %>%
  mutate(cohortmatch5_20 = cohort_5 + 20) # 5-year cohorts year plus 20 years of socialization

table(evsdata$cohort_5)
table(evsdata$cohortmatch5_15)
table(evsdata$cohortmatch5_20)

#### Generate Dataset ID ####

evsdata <- evsdata %>%
  mutate(data = "EVS")
table(evsdata$data)

#################################################################################################################
#################################################################################################################

#### RECODING OF VARIABLES FOR HARMONIZATION OF DATASETS ####

# Sex: no recording nessevary 0: men, 1: women

# Social Class: from 1 (low) to 5 (high/upper)

table(evsdata$social_class)
sum(is.na(evsdata$social_class))

evsdata <- evsdata %>%
  mutate(social_class = if_else(social_class==1, 5, 
                                   if_else(social_class==2, 4, 
                                           if_else(social_class==3, 3, 
                                                   if_else(social_class==4, 2,
                                                           if_else(social_class==5, 1, social_class))))))
                                                          
table(evsdata$social_class)
sum(is.na(evsdata$social_class))

# Education category: into 3-three scale: done above at education_3

# unemployed: already done (binary scale)

# income: already done 

# children: already done (binary scale)

#### Rescale Dependent Variables ####

# income_equality

table(evsdata$income_equality)

evsdata$income_equality <- rescale(evsdata$income_equality, to = c(0, 100))
table(evsdata$income_equality)


# government_resp 

table(evsdata$government_resp)
evsdata$government_resp <- rescale(evsdata$government_resp, to = c(0, 100))
table(evsdata$government_resp)

#### SAVE DATASET ####

evsdata <- evsdata %>%
  dplyr::select(data, iso3n, year, wave, sex, age, education, education_3, birth_year, income_deciles, 
                income_quintiles, social_class, children, unemployed, income_equality, government_resp, 
                cohort_5, cohortmatch5_15, cohortmatch5_20)

saveRDS(evsdata, file = "data/evsdata_prepared.rds")





